library(datasauRus)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.3     ✔ purrr   0.3.4
## ✔ tibble  3.1.0     ✔ dplyr   1.0.5
## ✔ tidyr   1.1.3     ✔ stringr 1.4.0
## ✔ readr   1.4.0     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
dino <- datasaurus_dozen %>% 
  filter(dataset == "dino") %>% 
  select(-dataset)
dino
## # A tibble: 142 x 2
##        x     y
##    <dbl> <dbl>
##  1  55.4  97.2
##  2  51.5  96.0
##  3  46.2  94.5
##  4  42.8  91.4
##  5  40.8  88.3
##  6  38.7  84.9
##  7  35.6  79.9
##  8  33.1  77.6
##  9  29.0  74.5
## 10  26.2  71.4
## # … with 132 more rows
dino %>% 
  ggplot(aes(x, y)) +
  geom_point()

library(tidyverse)
dept <- c("Physics", "Mathematics", "Statistics",
  "Computer Science")
nstaff <- c(12L, 8L, 20L, 23L)
sci_tbl <- tibble(dept = dept, count = nstaff)
sci_tbl
## # A tibble: 4 x 2
##   dept             count
##   <chr>            <int>
## 1 Physics             12
## 2 Mathematics          8
## 3 Statistics          20
## 4 Computer Science    23
barplot(as.matrix(sci_tbl$count), 
  legend = sci_tbl$dept)

pie(sci_tbl$count, 
  labels = sci_tbl$dept)

library(ggplot2)
ggplot(data = sci_tbl) +
  geom_bar(
    aes(x = "", y = count, fill = dept),
    stat = "identity"
  )

ggplot(data = sci_tbl) +
  geom_bar(
    aes(x = "", y = count, fill = dept),
    stat = "identity"
  ) +
  coord_polar(theta = "y") #<<

ggplot(data = sci_tbl, mapping = aes(x = dept, y = count)) +
  layer(geom = "bar", stat = "identity", position = "identity")

p <- ggplot(sci_tbl, aes(x = dept, y = count)) #<<
p

p + 
  geom_bar(stat = "identity")

p + 
  geom_col()

p +
  geom_point()

p +
  geom_segment(aes(xend = dept, y = 0, yend = count))

p +
  geom_point() +
  geom_segment(aes(xend = dept, y = 0, yend = count))

sci_tbl0 <- uncount(sci_tbl, count)
sci_tbl0
## # A tibble: 63 x 1
##    dept   
##    <chr>  
##  1 Physics
##  2 Physics
##  3 Physics
##  4 Physics
##  5 Physics
##  6 Physics
##  7 Physics
##  8 Physics
##  9 Physics
## 10 Physics
## # … with 53 more rows
ggplot(sci_tbl, aes(x = dept, y = count)) +
  geom_bar(stat = "identity")

ggplot(sci_tbl0, aes(x = dept)) +
  geom_bar(stat = "count")

p +
  geom_col(aes(colour = dept))

p +
  geom_col(aes(fill = dept))

p +
  geom_col(fill = "#756bb1")

p +
  geom_col(aes(fill = dept), colour = "#000000")

p +
  geom_point(aes(size = count))

p +
  geom_col(aes(fill = dept)) +
  coord_polar(theta = "y") #<<

p +
  geom_col(aes(fill = dept)) +
  theme_bw() #<<

library(ggthemes)
p +
  geom_col(aes(fill = dept)) +
  theme_economist() #<<

p +
  geom_col(aes(fill = dept)) +
  theme(axis.text.x = element_text(angle = 30, vjust = 0.1))

mpg
## # A tibble: 234 x 11
##    manufacturer model    displ  year   cyl trans   drv     cty   hwy fl    class
##    <chr>        <chr>    <dbl> <int> <int> <chr>   <chr> <int> <int> <chr> <chr>
##  1 audi         a4         1.8  1999     4 auto(l… f        18    29 p     comp…
##  2 audi         a4         1.8  1999     4 manual… f        21    29 p     comp…
##  3 audi         a4         2    2008     4 manual… f        20    31 p     comp…
##  4 audi         a4         2    2008     4 auto(a… f        21    30 p     comp…
##  5 audi         a4         2.8  1999     6 auto(l… f        16    26 p     comp…
##  6 audi         a4         2.8  1999     6 manual… f        18    26 p     comp…
##  7 audi         a4         3.1  2008     6 auto(a… f        18    27 p     comp…
##  8 audi         a4 quat…   1.8  1999     4 manual… 4        18    26 p     comp…
##  9 audi         a4 quat…   1.8  1999     4 auto(l… 4        16    25 p     comp…
## 10 audi         a4 quat…   2    2008     4 manual… 4        20    28 p     comp…
## # … with 224 more rows
p_mpg <- ggplot(mpg, aes(displ, cty)) + 
  geom_point(aes(colour = drv))
p_mpg

p_mpg +
  facet_grid(rows = vars(drv))

  # facet_grid(~ drv)
p_mpg +
  facet_grid(cols = vars(drv))

  # facet_grid(drv ~ .)
p_mpg +
  facet_grid(rows = vars(drv), cols = vars(cyl))

  # facet_grid(cyl ~ drv)
p_mpg +
  facet_wrap(vars(drv, cyl), ncol = 3)

  # facet_wrap(~ drv + cyl, ncol = 3)
movies <- as_tibble(jsonlite::read_json(
  "https://vega.github.io/vega-editor/app/data/movies.json",
  simplifyVector = TRUE))
movies
## # A tibble: 3,201 x 16
##    Title     US_Gross Worldwide_Gross US_DVD_Sales Production_Budg… Release_Date
##    <chr>        <int>           <dbl>        <int>            <int> <chr>       
##  1 The Land…   146083          146083           NA          8000000 12-Jun-98   
##  2 First Lo…    10876           10876           NA           300000 7-Aug-98    
##  3 I Marrie…   203134          203134           NA           250000 28-Aug-98   
##  4 Let's Ta…   373615          373615           NA           300000 11-Sep-98   
##  5 Slam       1009819         1087521           NA          1000000 9-Oct-98    
##  6 Mississi…    24551         2624551           NA          1600000 15-Jan-99   
##  7 Following    44705           44705           NA             6000 4-Apr-99    
##  8 Foolish    6026908         6026908           NA          1600000 9-Apr-99    
##  9 Pirates    1641825         6341825           NA         40000000 1-Jul-86    
## 10 Duel in … 20400000        20400000           NA          6000000 31-Dec-46   
## # … with 3,191 more rows, and 10 more variables: MPAA_Rating <chr>,
## #   Running_Time_min <int>, Distributor <chr>, Source <chr>, Major_Genre <chr>,
## #   Creative_Type <chr>, Director <chr>, Rotten_Tomatoes_Rating <int>,
## #   IMDB_Rating <dbl>, IMDB_Votes <int>
skimr::skim(movies)
Data summary
Name movies
Number of rows 3201
Number of columns 16
_______________________
Column type frequency:
character 8
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Title 1 1.00 1 66 0 3176 0
Release_Date 7 1.00 8 11 0 1603 0
MPAA_Rating 605 0.81 1 9 0 7 0
Distributor 232 0.93 3 33 0 174 0
Source 365 0.89 6 29 0 18 0
Major_Genre 275 0.91 5 19 0 12 0
Creative_Type 446 0.86 7 23 0 9 0
Director 1331 0.58 7 27 0 550 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
US_Gross 7 1.00 44002085.16 62555311.39 0.0 5493221.2 22019465.5 56091761.5 760167650.0 ▇▁▁▁▁
Worldwide_Gross 7 1.00 85343400.14 149947342.89 0.0 8031285.2 31168926.5 97283797.0 2767891499.0 ▇▁▁▁▁
US_DVD_Sales 2637 0.18 34901546.82 45895121.60 618454.0 9906210.8 20331557.5 37794215.8 352582053.0 ▇▁▁▁▁
Production_Budget 1 1.00 31069171.45 35585913.44 218.0 6575000.0 20000000.0 42000000.0 300000000.0 ▇▁▁▁▁
Running_Time_min 1992 0.38 110.19 20.17 46.0 95.0 107.0 121.0 222.0 ▁▇▃▁▁
Rotten_Tomatoes_Rating 880 0.73 54.34 28.08 1.0 30.0 55.0 80.0 100.0 ▅▆▆▇▇
IMDB_Rating 213 0.93 6.28 1.25 1.4 5.6 6.4 7.2 9.2 ▁▁▅▇▂
IMDB_Votes 213 0.93 29908.64 44937.58 18.0 4828.5 15106.0 35810.5 519541.0 ▇▁▁▁▁
ggplot(movies, aes(x = IMDB_Rating, y = Rotten_Tomatoes_Rating)) +
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "gam") +
  theme(aspect.ratio = 1)
## `geom_smooth()` using formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 941 rows containing non-finite values (stat_smooth).
## Warning: Removed 941 rows containing missing values (geom_point).

ggplot(movies, aes(x = IMDB_Rating, y = Rotten_Tomatoes_Rating)) +
  geom_hex() +
  theme(aspect.ratio = 1)
## Warning: Removed 941 rows containing non-finite values (stat_binhex).

ggplot(movies, aes(y = Major_Genre)) +
  geom_bar()

ggplot(movies) +
  geom_boxplot(aes(x = IMDB_Rating, y = Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_boxplot).

ggplot(movies) +
  geom_density(aes(x = IMDB_Rating, fill = Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_density).

library(ggridges)
ggplot(movies, aes(x = IMDB_Rating, y = Major_Genre)) +
  geom_density_ridges(aes(fill = Major_Genre))
## Picking joint bandwidth of 0.356
## Warning: Removed 213 rows containing non-finite values (stat_density_ridges).

ggplot(movies) +
  geom_density(aes(x = IMDB_Rating, fill = Major_Genre)) +
  facet_wrap(vars(Major_Genre))
## Warning: Removed 213 rows containing non-finite values (stat_density).